package au.com.acpfg.misc.spectra.quality; import java.io.File; import java.io.IOException; import java.io.Serializable; import java.util.Arrays; import java.util.Comparator; import org.knime.core.data.DataCell; import org.knime.core.data.DataColumnSpec; import org.knime.core.data.DataColumnSpecCreator; import org.knime.core.data.DataRow; import org.knime.core.data.DataTableSpec; import org.knime.core.data.DataType; import org.knime.core.data.RowIterator; import org.knime.core.data.RowKey; import org.knime.core.data.container.CellFactory; import org.knime.core.data.container.ColumnRearranger; import org.knime.core.data.container.SingleCellFactory; import org.knime.core.data.def.DefaultRow; import org.knime.core.data.def.DoubleCell; import org.knime.core.data.def.IntCell; import org.knime.core.data.def.JoinedRow; import org.knime.core.data.def.StringCell; import org.knime.core.node.BufferedDataContainer; import org.knime.core.node.BufferedDataTable; import org.knime.core.node.CanceledExecutionException; import org.knime.core.node.defaultnodesettings.SettingsModelDoubleBounded; import org.knime.core.node.defaultnodesettings.SettingsModelIntegerBounded; import org.knime.core.node.defaultnodesettings.SettingsModelString; import org.knime.core.node.ExecutionContext; import org.knime.core.node.ExecutionMonitor; import org.knime.core.node.InvalidSettingsException; import org.knime.core.node.NodeLogger; import org.knime.core.node.NodeModel; import org.knime.core.node.NodeSettingsRO; import org.knime.core.node.NodeSettingsWO; import au.com.acpfg.misc.spectra.SpectralDataInterface; /** * This is the model implementation of SpectraQualityAssessor. * Implements the 'Xrea' algorithm in the paper entitled "Quality Assessment of Tandem Mass Spectra Based on Cumulative Intensity Normalization" in the journal of proteome research. May implement other algorithms at a future date. * * @author Andrew Cassin */ public class SpectraQualityAssessorNodeModel extends NodeModel { // the logger instance private static final NodeLogger logger = NodeLogger .getLogger(SpectraQualityAssessorNodeModel.class); /** the settings key which is used to retrieve and store the settings (from the dialog or from a settings file) (package visibility to be usable from the dialog). */ static final String CFGKEY_SPECTRA = "spectra-column"; static final String CFGKEY_ADJUSTMENT_THRESHOLD = "xrea-adjustment-threshold"; // 85% TIC threshold before Xrea adjustment takes place static final String CFGKEY_ADJUSTMENT_PEAKS = "xrea-adjustment-peaks"; // no more than 5 peaks must comprise xrea-adjustment-threshold /** initial default count value. */ static final String DEFAULT_SPECTRA = "Spectra"; private final SettingsModelString m_spectra = new SettingsModelString(CFGKEY_SPECTRA, DEFAULT_SPECTRA); private final SettingsModelDoubleBounded m_adj_threshold = new SettingsModelDoubleBounded(CFGKEY_ADJUSTMENT_THRESHOLD, 0.85, 0.0, 1.0); private final SettingsModelIntegerBounded m_adj_peaks = new SettingsModelIntegerBounded(CFGKEY_ADJUSTMENT_PEAKS, 10, 0, 100); /** * Internal data structure -- not saved */ private SortablePeak[] m_peaks; private double[] m_mz; private double[] m_i; private double m_i_max; private double m_tic; private int m_num_peaks; /** * Constructor for the node model. */ protected SpectraQualityAssessorNodeModel() { // one incoming port and one outgoing port super(1, 1); m_peaks = null; } private ColumnRearranger createColumnRearranger(DataTableSpec in) { ColumnRearranger c = new ColumnRearranger(in); final int index = in.findColumnIndex(m_spectra.getStringValue()); DataColumnSpec newColSpec = new DataColumnSpecCreator("Xrea Value", DoubleCell.TYPE).createSpec(); DataColumnSpec bigPeakSpec= new DataColumnSpecCreator("Dominant Peak Adjusted Xrea Value", DoubleCell.TYPE).createSpec(); CellFactory cf = new SingleCellFactory(newColSpec) { public DataCell getCell(DataRow r) { if (index<0) return DataType.getMissingCell(); DataCell c = r.getCell(index); if (c.isMissing() || !(c instanceof SpectralDataInterface)) return DataType.getMissingCell(); SpectralDataInterface spectrum = (SpectralDataInterface) c; // this list is computed here and then thrown away at the end of processing each row for other quality ranking code to use double sum = 0.0; m_mz = spectrum.getMZ(); m_i = spectrum.getIntensity(); m_peaks = new SortablePeak[m_mz.length]; for (int j=0; j<m_mz.length; j++) { m_peaks[j] = new SortablePeak(m_mz[j], m_i[j]); sum += m_i[j]; } Arrays.sort(m_peaks); m_tic = sum; m_num_peaks = spectrum.getNumPeaks(); m_i_max = spectrum.getIntensityMostIntense(); return new DoubleCell(calc_xrea()); } }; CellFactory cf2 = new SingleCellFactory(bigPeakSpec) { public DataCell getCell(DataRow r) { DataCell c = r.getCell(index); if (c.isMissing() || !(c instanceof SpectralDataInterface)) { return DataType.getMissingCell(); } return new DoubleCell(calc_adjusted_xrea()); } }; m_peaks = null; c.append(cf); c.append(cf2); return c; } /* // code from Henry Lam (author of TPP spectrast, ported from C++ by Andrew Cassin) protected double calc_xrea(SpectralDataInterface c) { int n_peaks = c.getNumPeaks(); if (n_peaks < 6) { return (0.0); } double[] mz = c.getMZ(); double[] i = c.getIntensity(); SortablePeak[] ranking = new SortablePeak[mz.length]; double sum = 0.0; for (int j=0; j<mz.length; j++) { ranking[j] = new SortablePeak(mz[j], i[j]); sum += i[j]; } Arrays.sort(ranking, new Comparator() { // descending order public int compare(Object arg0, Object arg1) { SortablePeak a = (SortablePeak) arg0; SortablePeak b = (SortablePeak) arg1; if (a.m_i < b.m_i) return 1; else if (a.m_i > b.m_i) return 0; else { return 0; } } }); double tic = 0.0; for (double d : i) { tic += d; } double slope = tic / i.length; double cumInten = 0.0; double diagonal = 0.0; double xrea = 0.0; double triangle = 0.0; for (int rank = ranking.length - 1; rank >= 0; rank--) { diagonal += slope; cumInten += ranking[rank].getIntensity(); xrea += diagonal - cumInten; triangle += diagonal; } xrea = xrea / triangle; return ((double)(xrea)); }*/ /** * A problem with the Xrea calculation is what happens when only a few peaks dominate more than 90% of the TIC (sum of all intensity). * This code removes the dominant peaks, subject to user configuration, and then computes the adjusted xrea score using the remaining peaks only. * If a spectra does not have dominant peaks, the result will be the same as calc_xrea() */ protected double calc_adjusted_xrea() { assert(m_peaks != null && m_tic >= 0.0); // m_peaks must already be computed by xrea // poor spectra? if (m_peaks.length < 6) return 0.0; // how many peaks to reach x% of tic? int cnt = 0; double cum = 0.0; double threshold = m_adj_threshold.getDoubleValue(); int max_peaks = m_adj_peaks.getIntValue(); for (int i=m_peaks.length-1; i>=0; i--) { cum += m_peaks[i].getIntensity(); if (cum >= threshold * m_tic && cnt <= max_peaks) { // recompute the members with dominant peaks removed and recompute xrea double sum = 0.0; m_num_peaks -= cnt; // eliminate dominant peaks m_i_max = 0.0; m_mz = new double[m_num_peaks]; m_i = new double[m_num_peaks]; SortablePeak[] tmp = new SortablePeak[m_num_peaks]; for (int j=m_num_peaks-1; j>=0; j--) { SortablePeak sp = new SortablePeak(m_peaks[j].getMZ(), m_peaks[j].getIntensity()); m_mz[j]= sp.getMZ(); m_i[j] = sp.getIntensity(); tmp[j] = sp; sum += sp.getIntensity(); if (sp.getIntensity() > m_i_max) { m_i_max = sp.getIntensity(); } } m_peaks = tmp; Arrays.sort(m_peaks); m_tic = sum; // since the members have been adjusted to exclude dominant peaks, this will not be the same as the adjacent column return calc_xrea(); } cnt++; } // no adjustment return calc_xrea(); } protected double calc_xrea() { // spectra with only a few peaks are useless for this calculation... if (m_num_peaks > 5) { assert(m_i.length == m_mz.length); // xrea calculation double[] relative_intensity = new double[m_mz.length]; double sum_so_far = 0.0; double peak_area = 0.0; for (int j=0; j<m_mz.length; j++) { sum_so_far += m_peaks[j].getIntensity(); relative_intensity[j] = sum_so_far / m_tic; peak_area += relative_intensity[j]; //System.err.println(peaks[j].getMZ() + " " + relative_intensity[j]); } int last_intensity = m_mz.length - 1; double triangle_area = 0.5 * m_mz.length * relative_intensity[last_intensity]; double alpha = relative_intensity[last_intensity] - relative_intensity[last_intensity-1]; return ((triangle_area - peak_area) / (triangle_area + alpha)); } return 0.0; // minimum area between curve and diagonal ie. poorest quality spectra } /** * {@inheritDoc} */ @Override protected BufferedDataTable[] execute(final BufferedDataTable[] inData, final ExecutionContext exec) throws Exception { ColumnRearranger c = createColumnRearranger(inData[0].getDataTableSpec()); BufferedDataTable out = exec.createColumnRearrangeTable(inData[0], c, exec); return new BufferedDataTable[] {out}; } /** * {@inheritDoc} */ @Override protected void reset() { } /** * {@inheritDoc} */ @Override protected DataTableSpec[] configure(final DataTableSpec[] inSpecs) throws InvalidSettingsException { DataColumnSpec c = inSpecs[0].getColumnSpec(m_spectra.getStringValue()); if (c==null || !c.getType().isCompatible(SpectralDataInterface.class)) { throw new InvalidSettingsException("No suitable spectra column found!"); } ColumnRearranger cr = createColumnRearranger(inSpecs[0]); DataTableSpec result = cr.createSpec(); return new DataTableSpec[] {result}; } /** * {@inheritDoc} */ @Override protected void saveSettingsTo(final NodeSettingsWO settings) { m_spectra.saveSettingsTo(settings); m_adj_threshold.saveSettingsTo(settings); m_adj_peaks.saveSettingsTo(settings); } /** * {@inheritDoc} */ @Override protected void loadValidatedSettingsFrom(final NodeSettingsRO settings) throws InvalidSettingsException { m_spectra.loadSettingsFrom(settings); m_adj_threshold.loadSettingsFrom(settings); m_adj_peaks.loadSettingsFrom(settings); } /** * {@inheritDoc} */ @Override protected void validateSettings(final NodeSettingsRO settings) throws InvalidSettingsException { m_spectra.validateSettings(settings); m_adj_threshold.validateSettings(settings); m_adj_peaks.validateSettings(settings); } /** * {@inheritDoc} */ @Override protected void loadInternals(final File internDir, final ExecutionMonitor exec) throws IOException, CanceledExecutionException { } /** * {@inheritDoc} */ @Override protected void saveInternals(final File internDir, final ExecutionMonitor exec) throws IOException, CanceledExecutionException { } /** * Sorts peaks by ascending intensity (or area if thats what the data contains) * @author andrew.cassin * */ private class SortablePeak implements Comparator, Comparable, Serializable { private double m_mz; private double m_i; public SortablePeak(double mz, double i) { m_mz = mz; m_i = i; } public double getMZ() { return m_mz; } public double getIntensity() { return m_i; } public String toString() { return m_mz + " " + m_i; } @Override public int compare(Object arg0, Object arg1) { SortablePeak a = (SortablePeak) arg0; SortablePeak b = (SortablePeak) arg1; if (a.m_i < b.m_i) return -1; else if (a.m_i > b.m_i) return 1; else { if (a.m_mz < b.m_mz) return -1; else if (a.m_mz > b.m_mz) return 1; // else... return 0; } } @Override public int compareTo(Object arg0) { return compare(this, arg0); } } }